<?
set_time_limit(0);
include_once('../common.inc.php');
include_once(_INC_PATH.'database.php');
$begin = time();
$root = 'http://www.71dp.com';
$s_root = 'http://www.71dp.com/cate/';
$db = new database();

for($i = 1; $i <= 108; $i++)
{
	$url = $s_root."1001_sumreview_{$i}/";
	$html = file_get_contents($url);
	preg_match('#<div class="left_table">(.*)<div class="right" style="margin-top:0;">#is',$html,$matches);
	$html = $matches[1];unset($matches);
	preg_match_all('#<a href="(/shop/[0-9]+/)" class="company_title_index">(.*)</a>#isU',$html,$matches);
	$detail_url_arr = $matches[1];
	$cor_name_arr = $matches[2];unset($matches);
	foreach($detail_url_arr as $key=>$value)
	{
		$detail_html = file_get_contents($root.$value);
		$detail_html = changeEncode($detail_html,'g2u');
		$corInfo['name'] = preg_name($detail_html);
		$corInfo = array_merge($corInfo,getInfoHtml($detail_html));
		$corInfo['description'] = preg_description($detail_html);
		$corInfo['create_time'] = date("Y-m-d H:i:s");
		$corInfo['del_flg'] = '0';
		$corInfo['create_author'] = '87dianping';
		$corInfo['city_id'] = '1';
		$cor_id = $db->insert($corInfo,'tbl_corporation');
		if($cor_id > 0){
			comment_insert($detail_html,$cor_id);
			preg_match('#<div class="p_bar">(.*)</div>#isU',$detail_html,$matches);
			$page_tag = $matches[1];unset($matches);
			preg_match_all('#<a href="(.*)" class="p_num">(.*)</a>#isU',$page_tag,$matches);
			foreach($matches[1] as $key=>$value)
			{
				$page_url = $root.$value;
				$page_html = file_get_contents($page_url);
				$page_html = changeEncode($page_html,'g2u');
				comment_insert($page_html,$cor_id);
			}
			unset($matches);
		}
	}
}
echo '运行了'.(time()-$begin).'秒<br/>';
echo "finished";
//企业名称
function preg_name($html)
{
	preg_match('#<div class="company_title_left">(.*)</div>#isU',$html,$matches);
	$name_html = $matches[1];unset($matches);
	preg_match('#<font color="\#000000">([^<]+)</font>#isU',$name_html,$matches);
	$name = $matches[1];unset($matches);
	return $name;
}
//企业信息
function getInfoHtml($html)
{
	preg_match('#<div class="left_table_shop_main_right">(.*)<div class="company_intro1">#isU',$html,$matches);
	$info_html = $matches[1];unset($matches);
	preg_match_all('#<div class="company_info"(.*)>(.*)</div>#isU',$info_html,$matches);
	$infoArr = array();
	if(is_array($matches[2]) && !empty($matches[2]))
	{
		$infoArr['address'] = preg_address($matches[2][0]);
		$infoArr['type'] = preg_type($matches[2][1]);
		$infoArr['scale'] = preg_scale($matches[2][2]);
		$infoArr['url'] = preg_url($matches[2][3]);
		$infoArr['tel'] = preg_tel_fax($matches[2][4]);
		$infoArr['fax'] = preg_tel_fax($matches[2][5]);
	}
	return $infoArr;
}
//企业地址
function preg_address($html)
{
	preg_match('#</span>(.*)<!--#isU',$html,$matches);
	return $matches[1];
}
//企业性质
function preg_type($html)
{
	if(strpos($html,'民营') !== false){
		return '0';
	}elseif(strpos($html,'外资') !== false && strpos($html,'欧美') !== false){
		return '1';
	}elseif(strpos($html,'外资') !== false && strpos($html,'非欧美') !== false){
		return '2';
	}elseif(strpos($html,'合资') !== false && strpos($html,'欧美') !== false){
		return '3';
	}elseif(strpos($html,'合资') !== false && strpos($html,'非欧美') !== false){
		return '4';
	}elseif(strpos($html,'国企') !== false){
		return '5';
	}elseif(strpos($html,'外企代表处') !== false){
		return '6';
	}elseif(strpos($html,'其它性质') !== false){
		return '7';
	}else{
		return '';
	}
}
//企业规模
function preg_scale($html)
{
	if(strpos($html,'微型') !== false){
		return '0';
	}elseif(strpos($html,'小型') !== false){
		return '1';
	}elseif(strpos($html,'中小型') !== false){
		return '2';
	}elseif(strpos($html,'中型') !== false){
		return '3';
	}elseif(strpos($html,'大型') !== false){
		return '4';
	}else{
		return '';
	}
}
//企业网站
function preg_url($html)
{
	preg_match('#<a(.*)>(.*)</a>#isU',$html,$matches);
	return $matches[2];
}
function preg_tel_fax($html)
{
	$html = preg_replace('#<span(.*)>(.*)</span>#isU','',$html);
	$html = preg_replace('#[^0-9|^-]*#is','',$html);
	return $html;
}
function preg_description($html)
{
	preg_match('#<span id="shop-content" style="display:none">(.*)</span>#isU',$html,$matches);
	$description = $matches[1];unset($matches);
	return str_replace('<a href="javascript:view_content(\'little\');" >[精简]</a>','',$description);
}
function comment_insert($html,$cor_id)
{
	global $db;
	preg_match_all('#<div class="company_rank_list1_avgsort">(.*)</div>#isU',$html,$matches);
	$starArr = array_chunk($matches[1],4);unset($matches);
	$starArr2 = array();
	foreach($starArr as $key=>$value)
	{
		foreach($value as $kk=>$vv)
		{
			preg_match('#<li class="start(\d{1})"([^>]+)></li>#isU',$vv,$matches);
			$point = $matches[1]-1;unset($matches);
			if($point < 1)$point = 1;
			$starArr2[$key][$kk] = $point;
		}
	}
	preg_match_all('#<span class="font12_4">(\d{4}-\d{2}-\d{2}\s{1}\d{2}:\d{2}:\d{2})</span>#isU',$html,$matches);
	$create_time = $matches[1];unset($matches);
	preg_match_all('#<div class="review_index_content" style="margin-top:10px;">(.*)</div>#isU',$html,$matches);
	foreach($matches[1] as $key=>$value){
		$commentArr = array();
		$commentArr['content'] = preg_replace('#<span class="font12_4">(.*)</span>#isU','',$value);
		$commentArr['salary'] = $starArr2[$key][0];
		$commentArr['environment'] = $starArr2[$key][1];
		$commentArr['opportunity'] = $starArr2[$key][2];
		$commentArr['pressure'] = $starArr2[$key][3];
		if(!preg_match('#^\d{4}-\d{2}-\d{2}$#',$create_time[$key]))
			$create_time[$key]=date("Y-m-d H:i:s");
		$commentArr['create_time'] = $create_time[$key];
		$commentArr['del_flg'] = '0';
		$commentArr['cor_id'] = $cor_id;
		$commentArr['work_time'] = rand(5,25);
		$commentArr['show_flg'] = '1';
		$commentArr['user_name'] = '百企网友';
		$db->insert($commentArr,'tbl_estimate');
	}
}
?>